{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 关于梯度的计算调试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.random.seed(666)\n",
    "X = np.random.random(size=(1000, 10))\n",
    "\n",
    "true_theta = np.arange(1, 12, dtype=float)\n",
    "X_b = np.hstack([np.ones((len(X), 1)), X])\n",
    "y = X_b.dot(true_theta) + np.random.normal(size=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "true_theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1000, 10)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1000,)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def J(theta, X_b, y):\n",
    "    try:\n",
    "        return np.sum((y - X_b.dot(theta))**2) / len(X_b)\n",
    "    except:\n",
    "        return float('inf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def dJ_math(theta, X_b, y):\n",
    "    return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def dJ_debug(theta, X_b, y, epsilon=0.01):\n",
    "    res = np.empty(len(theta))\n",
    "    for i in range(len(theta)):\n",
    "        theta_1 = theta.copy()\n",
    "        theta_1[i] += epsilon\n",
    "        theta_2 = theta.copy()\n",
    "        theta_2[i] -= epsilon\n",
    "        res[i] = (J(theta_1, X_b, y) - J(theta_2, X_b, y)) / (2 * epsilon)\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):\n",
    "    \n",
    "    theta = initial_theta\n",
    "    cur_iter = 0\n",
    "\n",
    "    while cur_iter < n_iters:\n",
    "        gradient = dJ(theta, X_b, y)\n",
    "        last_theta = theta\n",
    "        theta = theta - eta * gradient\n",
    "        if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):\n",
    "            break\n",
    "            \n",
    "        cur_iter += 1\n",
    "\n",
    "    return theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 13.8 s, sys: 283 ms, total: 14.1 s\n",
      "Wall time: 7.6 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([  1.1251597 ,   2.05312521,   2.91522497,   4.11895968,\n",
       "         5.05002117,   5.90494046,   6.97383745,   8.00088367,\n",
       "         8.86213468,   9.98608331,  10.90529198])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_b = np.hstack([np.ones((len(X), 1)), X])\n",
    "initial_theta = np.zeros(X_b.shape[1])\n",
    "eta = 0.01\n",
    "\n",
    "%time theta = gradient_descent(dJ_debug, X_b, y, initial_theta, eta)\n",
    "theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.57 s, sys: 30.6 ms, total: 1.6 s\n",
      "Wall time: 856 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([  1.1251597 ,   2.05312521,   2.91522497,   4.11895968,\n",
       "         5.05002117,   5.90494046,   6.97383745,   8.00088367,\n",
       "         8.86213468,   9.98608331,  10.90529198])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time theta = gradient_descent(dJ_math, X_b, y, initial_theta, eta)\n",
    "theta"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
